{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 2: Pattern Refinement\n",
    "\n",
    "\n",
    "1. [Compare models](#import)\n",
    "2. [Identify representative documents (Table 5)](#identify)\n",
    "\n",
    "This code produces the output used in Table 5, and presents a workflow to carry out the second step in the computational grounded theory workflow: guided deep reading."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='import'></a>\n",
    "## Import Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, read in the Structural Topic Model data and create a Pandas df with desired model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#enable use of R in a Python kernel\n",
    "%load_ext rpy2.ipython"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:186: RRuntimeWarning: stm v1.1.3 (2016-01-14) successfully loaded. See ?stm for help.\n",
      "\n",
      "  warnings.warn(x, RRuntimeWarning)\n"
     ]
    }
   ],
   "source": [
    "%%R\n",
    "library(stm)\n",
    "\n",
    "#load the saved STM\n",
    "load(\"../data/stm_all.RData\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%R -o df_all \n",
    "#The above outputs the R variable df_all for use in Python cells below\n",
    "\n",
    "#merge theta onto original dataset\n",
    "meta$ID <- seq.int(nrow(meta))\n",
    "theta <- data.frame(mod.40$theta)\n",
    "theta$ID <- seq.int(nrow(theta))\n",
    "df_all <- merge(meta, theta)\n",
    "df_all['ID'] <- NULL\n",
    "df_all['X'] <- NULL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>city</th>\n",
       "      <th>publication</th>\n",
       "      <th>date</th>\n",
       "      <th>word_count</th>\n",
       "      <th>org</th>\n",
       "      <th>identifier</th>\n",
       "      <th>wave</th>\n",
       "      <th>text_string</th>\n",
       "      <th>X1</th>\n",
       "      <th>...</th>\n",
       "      <th>X31</th>\n",
       "      <th>X32</th>\n",
       "      <th>X33</th>\n",
       "      <th>X34</th>\n",
       "      <th>X35</th>\n",
       "      <th>X36</th>\n",
       "      <th>X37</th>\n",
       "      <th>X38</th>\n",
       "      <th>X39</th>\n",
       "      <th>X40</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>notessecondyear_70.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>553</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1 1 1 1 10 11 2 2 2 2 3 3 3 4 5 6 7 8 9 A An...</td>\n",
       "      <td>0.000009</td>\n",
       "      <td>...</td>\n",
       "      <td>9.839842e-07</td>\n",
       "      <td>1.977760e-04</td>\n",
       "      <td>0.000535</td>\n",
       "      <td>0.003818</td>\n",
       "      <td>1.121387e-05</td>\n",
       "      <td>2.214182e-08</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>2.896903e-03</td>\n",
       "      <td>1.724483e-04</td>\n",
       "      <td>2.217323e-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>chicago.cwlu_womankind.1971.11.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1971</td>\n",
       "      <td>890</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>411 93 Actually Alice American American Any As...</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>...</td>\n",
       "      <td>1.174310e-06</td>\n",
       "      <td>9.718309e-04</td>\n",
       "      <td>0.000185</td>\n",
       "      <td>0.000362</td>\n",
       "      <td>3.533782e-04</td>\n",
       "      <td>1.145776e-03</td>\n",
       "      <td>0.307760</td>\n",
       "      <td>8.900101e-04</td>\n",
       "      <td>9.877170e-03</td>\n",
       "      <td>4.152077e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>nyc.masses_1916.04.21.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1916</td>\n",
       "      <td>425</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>All Anarchist Anarchist And Birth Birth Birth ...</td>\n",
       "      <td>0.000005</td>\n",
       "      <td>...</td>\n",
       "      <td>1.599000e-02</td>\n",
       "      <td>1.085408e-04</td>\n",
       "      <td>0.103392</td>\n",
       "      <td>0.000093</td>\n",
       "      <td>7.041272e-05</td>\n",
       "      <td>1.025539e-05</td>\n",
       "      <td>0.000181</td>\n",
       "      <td>1.981448e-04</td>\n",
       "      <td>1.036758e-03</td>\n",
       "      <td>1.956936e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>nyc.redstockings.1973.mainardi.marriagequestio...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>972</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1968 1968 50s 60s Although Although American A...</td>\n",
       "      <td>0.000610</td>\n",
       "      <td>...</td>\n",
       "      <td>5.621154e-05</td>\n",
       "      <td>4.041277e-03</td>\n",
       "      <td>0.001754</td>\n",
       "      <td>0.003146</td>\n",
       "      <td>5.095332e-06</td>\n",
       "      <td>4.110163e-06</td>\n",
       "      <td>0.000657</td>\n",
       "      <td>5.475914e-03</td>\n",
       "      <td>3.231665e-03</td>\n",
       "      <td>3.159558e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>chicago.cwlu_womankind.1972.01.01.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>39</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>1972 5 Ghots I January Womankind a bind by cro...</td>\n",
       "      <td>0.000417</td>\n",
       "      <td>...</td>\n",
       "      <td>6.662805e-05</td>\n",
       "      <td>5.592332e-04</td>\n",
       "      <td>0.015263</td>\n",
       "      <td>0.000041</td>\n",
       "      <td>1.111954e-02</td>\n",
       "      <td>4.302147e-03</td>\n",
       "      <td>0.396293</td>\n",
       "      <td>3.225515e-03</td>\n",
       "      <td>1.853908e-03</td>\n",
       "      <td>7.546477e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>notesfirstyear_30.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notesfirstyear</td>\n",
       "      <td>1968</td>\n",
       "      <td>442</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>12 12 15 1868 1868 1968 28 A AUNT All Anybody ...</td>\n",
       "      <td>0.000198</td>\n",
       "      <td>...</td>\n",
       "      <td>1.427645e-04</td>\n",
       "      <td>3.033551e-02</td>\n",
       "      <td>0.017309</td>\n",
       "      <td>0.002739</td>\n",
       "      <td>6.336636e-05</td>\n",
       "      <td>6.437441e-05</td>\n",
       "      <td>0.030969</td>\n",
       "      <td>9.519957e-03</td>\n",
       "      <td>3.588646e-02</td>\n",
       "      <td>5.097306e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>chicago.cwlu_womankind.1972.05.14.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>976</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1970 2 2 3 4 4 5 6 7 8 A AT Also Also Amer A...</td>\n",
       "      <td>0.002649</td>\n",
       "      <td>...</td>\n",
       "      <td>1.073067e-05</td>\n",
       "      <td>2.910664e-05</td>\n",
       "      <td>0.000167</td>\n",
       "      <td>0.000471</td>\n",
       "      <td>8.218961e-02</td>\n",
       "      <td>4.336913e-01</td>\n",
       "      <td>0.000287</td>\n",
       "      <td>7.205336e-03</td>\n",
       "      <td>2.120207e-04</td>\n",
       "      <td>4.333721e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.programforcons...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>785</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>1 2 3 A A A APPENDIX And CONSCIOUSNESSRAISING ...</td>\n",
       "      <td>0.000005</td>\n",
       "      <td>...</td>\n",
       "      <td>1.564634e-06</td>\n",
       "      <td>1.118471e-04</td>\n",
       "      <td>0.000506</td>\n",
       "      <td>0.001910</td>\n",
       "      <td>2.077238e-05</td>\n",
       "      <td>2.165445e-07</td>\n",
       "      <td>0.000150</td>\n",
       "      <td>2.214874e-03</td>\n",
       "      <td>1.769736e-02</td>\n",
       "      <td>1.491251e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>chicago.cwlu_womankind.1972.11.11.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>985</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>1867 1972 A AND ARTICLES Adopt Affiar All Amaz...</td>\n",
       "      <td>0.000512</td>\n",
       "      <td>...</td>\n",
       "      <td>1.620817e-05</td>\n",
       "      <td>1.483889e-02</td>\n",
       "      <td>0.044229</td>\n",
       "      <td>0.000432</td>\n",
       "      <td>2.356348e-04</td>\n",
       "      <td>8.004961e-06</td>\n",
       "      <td>0.598019</td>\n",
       "      <td>6.536029e-03</td>\n",
       "      <td>4.789977e-04</td>\n",
       "      <td>3.526054e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>chicago.cwlu_womankind.1972.03.20.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>369</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>Above CWLU CWLU CWLU Chicago Chicago Discus Li...</td>\n",
       "      <td>0.000227</td>\n",
       "      <td>...</td>\n",
       "      <td>2.225532e-05</td>\n",
       "      <td>3.113088e-04</td>\n",
       "      <td>0.002405</td>\n",
       "      <td>0.000890</td>\n",
       "      <td>5.669615e-04</td>\n",
       "      <td>3.215436e-05</td>\n",
       "      <td>0.000443</td>\n",
       "      <td>3.157324e-03</td>\n",
       "      <td>3.696998e-04</td>\n",
       "      <td>2.198869e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>chicago.cwlu_womankind.1972.01.12.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>938</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>10 12volt 15 6volt 6volt 6volt A A Another As ...</td>\n",
       "      <td>0.609806</td>\n",
       "      <td>...</td>\n",
       "      <td>2.187946e-06</td>\n",
       "      <td>2.188952e-04</td>\n",
       "      <td>0.000195</td>\n",
       "      <td>0.000185</td>\n",
       "      <td>3.941418e-06</td>\n",
       "      <td>6.729796e-04</td>\n",
       "      <td>0.000022</td>\n",
       "      <td>4.303980e-06</td>\n",
       "      <td>2.919611e-06</td>\n",
       "      <td>1.241344e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>chicago.cwlu_womankind.1972.09.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1591</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "      <td>1930s 1930s 1auqhter 30s 5th 6th 7871786 A Act...</td>\n",
       "      <td>0.002045</td>\n",
       "      <td>...</td>\n",
       "      <td>4.415765e-04</td>\n",
       "      <td>2.306720e-04</td>\n",
       "      <td>0.022810</td>\n",
       "      <td>0.000429</td>\n",
       "      <td>4.670267e-01</td>\n",
       "      <td>3.282751e-04</td>\n",
       "      <td>0.000316</td>\n",
       "      <td>5.848442e-02</td>\n",
       "      <td>4.610525e-03</td>\n",
       "      <td>8.258727e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>chicago.cwlu_womankind.1973.04.16.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>689</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>13</td>\n",
       "      <td>2</td>\n",
       "      <td>25 29 2ND 4 656 A Any Any April As Barry But C...</td>\n",
       "      <td>0.003974</td>\n",
       "      <td>...</td>\n",
       "      <td>1.693714e-04</td>\n",
       "      <td>2.471187e-04</td>\n",
       "      <td>0.003164</td>\n",
       "      <td>0.361153</td>\n",
       "      <td>5.016687e-02</td>\n",
       "      <td>2.265492e-03</td>\n",
       "      <td>0.000475</td>\n",
       "      <td>6.066289e-04</td>\n",
       "      <td>3.293610e-05</td>\n",
       "      <td>1.106561e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>chicago.cwlu_womankind.1972.12.08.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1130</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>14</td>\n",
       "      <td>2</td>\n",
       "      <td>10 1020 1161 12 1300s 1500 1793 1930s 1950s 19...</td>\n",
       "      <td>0.000046</td>\n",
       "      <td>...</td>\n",
       "      <td>8.282622e-05</td>\n",
       "      <td>3.213172e-07</td>\n",
       "      <td>0.000349</td>\n",
       "      <td>0.000002</td>\n",
       "      <td>2.382573e-04</td>\n",
       "      <td>1.739985e-04</td>\n",
       "      <td>0.000005</td>\n",
       "      <td>9.202287e-07</td>\n",
       "      <td>8.829141e-07</td>\n",
       "      <td>1.417024e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>chicago.cwlu_womankind.1973.11.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>899</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>15</td>\n",
       "      <td>2</td>\n",
       "      <td>14 30 ABORTION AFLCIO AFLCIO AND Abortions Act...</td>\n",
       "      <td>0.000213</td>\n",
       "      <td>...</td>\n",
       "      <td>4.258701e-04</td>\n",
       "      <td>1.257658e-03</td>\n",
       "      <td>0.005907</td>\n",
       "      <td>0.000144</td>\n",
       "      <td>1.375314e-03</td>\n",
       "      <td>4.037606e-05</td>\n",
       "      <td>0.000115</td>\n",
       "      <td>2.322311e-02</td>\n",
       "      <td>1.585904e-04</td>\n",
       "      <td>4.077263e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>chicago.cwlu_womankind.1973.06.08.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>1106</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>16</td>\n",
       "      <td>2</td>\n",
       "      <td>112940 1970 1971 1972 1972 1974 26000 3 341840...</td>\n",
       "      <td>0.001097</td>\n",
       "      <td>...</td>\n",
       "      <td>1.057206e-03</td>\n",
       "      <td>2.055740e-03</td>\n",
       "      <td>0.007596</td>\n",
       "      <td>0.000127</td>\n",
       "      <td>4.788954e-04</td>\n",
       "      <td>1.927180e-04</td>\n",
       "      <td>0.000081</td>\n",
       "      <td>3.401634e-04</td>\n",
       "      <td>8.814712e-06</td>\n",
       "      <td>5.756271e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>chicago.cwlu_womankind.1972.06.13.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1573</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>17</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1100015000 13 13 1318 18 1800036000 1972 1eV...</td>\n",
       "      <td>0.000165</td>\n",
       "      <td>...</td>\n",
       "      <td>7.044254e-05</td>\n",
       "      <td>1.355798e-03</td>\n",
       "      <td>0.001558</td>\n",
       "      <td>0.000013</td>\n",
       "      <td>3.585470e-05</td>\n",
       "      <td>6.931998e-09</td>\n",
       "      <td>0.000011</td>\n",
       "      <td>8.414556e-04</td>\n",
       "      <td>8.204866e-05</td>\n",
       "      <td>4.498704e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>chicago.cwlu_womankind.1972.12.05.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1439</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>18</td>\n",
       "      <td>2</td>\n",
       "      <td>1972 2 2 3 4 438211 552 575 79 852 A A A A A A...</td>\n",
       "      <td>0.001080</td>\n",
       "      <td>...</td>\n",
       "      <td>1.061891e-04</td>\n",
       "      <td>2.028123e-03</td>\n",
       "      <td>0.011778</td>\n",
       "      <td>0.001235</td>\n",
       "      <td>2.414092e-05</td>\n",
       "      <td>2.241605e-04</td>\n",
       "      <td>0.000117</td>\n",
       "      <td>1.852975e-03</td>\n",
       "      <td>9.860627e-06</td>\n",
       "      <td>1.692545e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>nyc.redstockings.1973.hanisch.mensliberation-3...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1094</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "      <td>1000 150 375 6 7 74 And Apparently As As Avenu...</td>\n",
       "      <td>0.027984</td>\n",
       "      <td>...</td>\n",
       "      <td>1.345389e-04</td>\n",
       "      <td>8.122124e-03</td>\n",
       "      <td>0.052807</td>\n",
       "      <td>0.002893</td>\n",
       "      <td>1.443225e-05</td>\n",
       "      <td>8.162189e-07</td>\n",
       "      <td>0.010267</td>\n",
       "      <td>4.983195e-03</td>\n",
       "      <td>2.062907e-04</td>\n",
       "      <td>4.618381e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>nyc.redstockings.1973.hanisch.workingconscious...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1011</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>2 3 3 4 4 Although Being But Do Do FROM Gradua...</td>\n",
       "      <td>0.000223</td>\n",
       "      <td>...</td>\n",
       "      <td>4.107019e-06</td>\n",
       "      <td>4.187583e-04</td>\n",
       "      <td>0.003844</td>\n",
       "      <td>0.001389</td>\n",
       "      <td>1.488908e-04</td>\n",
       "      <td>2.062882e-07</td>\n",
       "      <td>0.000388</td>\n",
       "      <td>1.752389e-01</td>\n",
       "      <td>3.812483e-03</td>\n",
       "      <td>2.043010e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>notessecondyear_25.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>974</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>21</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1 1570 2 3 369 4 5 5 50 6 6 7 7 8 92169 95 A...</td>\n",
       "      <td>0.122361</td>\n",
       "      <td>...</td>\n",
       "      <td>2.637828e-05</td>\n",
       "      <td>4.774722e-02</td>\n",
       "      <td>0.001849</td>\n",
       "      <td>0.016906</td>\n",
       "      <td>7.317483e-05</td>\n",
       "      <td>4.864050e-04</td>\n",
       "      <td>0.000775</td>\n",
       "      <td>2.858366e-03</td>\n",
       "      <td>1.434772e-03</td>\n",
       "      <td>9.256029e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.whatwereallywa...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>959</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>22</td>\n",
       "      <td>2</td>\n",
       "      <td>1971 26 All And And And And August Being But C...</td>\n",
       "      <td>0.000858</td>\n",
       "      <td>...</td>\n",
       "      <td>6.155151e-07</td>\n",
       "      <td>1.878060e-03</td>\n",
       "      <td>0.000187</td>\n",
       "      <td>0.000909</td>\n",
       "      <td>1.162320e-05</td>\n",
       "      <td>2.170622e-05</td>\n",
       "      <td>0.000952</td>\n",
       "      <td>2.109324e-01</td>\n",
       "      <td>8.855499e-04</td>\n",
       "      <td>8.321648e-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>chicago.cwlu_womankind.1971.12.03.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1971</td>\n",
       "      <td>137</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>1970 A A AGGRESS ALL AND ANOTHER AS BE BEFORE ...</td>\n",
       "      <td>0.004322</td>\n",
       "      <td>...</td>\n",
       "      <td>5.850770e-04</td>\n",
       "      <td>1.537002e-03</td>\n",
       "      <td>0.012361</td>\n",
       "      <td>0.001001</td>\n",
       "      <td>8.044500e-02</td>\n",
       "      <td>2.507706e-03</td>\n",
       "      <td>0.000997</td>\n",
       "      <td>5.242713e-03</td>\n",
       "      <td>3.255564e-04</td>\n",
       "      <td>1.522500e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>chicago.hullhouse_bulletin.1916.01.61_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1916</td>\n",
       "      <td>717</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>1914 1915 Abbott Addams Addams Addams Aletta A...</td>\n",
       "      <td>0.000097</td>\n",
       "      <td>...</td>\n",
       "      <td>2.806775e-01</td>\n",
       "      <td>1.368944e-04</td>\n",
       "      <td>0.014723</td>\n",
       "      <td>0.001854</td>\n",
       "      <td>1.447190e-01</td>\n",
       "      <td>2.125971e-04</td>\n",
       "      <td>0.000056</td>\n",
       "      <td>4.482416e-02</td>\n",
       "      <td>1.826577e-03</td>\n",
       "      <td>7.061188e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>chicago.cwlu_womankind.1973.09.04.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>878</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>25</td>\n",
       "      <td>2</td>\n",
       "      <td>100000 10th 13 160 1950s 1972 1972 20hour 21 2...</td>\n",
       "      <td>0.000280</td>\n",
       "      <td>...</td>\n",
       "      <td>5.606136e-04</td>\n",
       "      <td>5.961911e-04</td>\n",
       "      <td>0.006484</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>6.439716e-02</td>\n",
       "      <td>3.438558e-04</td>\n",
       "      <td>0.006632</td>\n",
       "      <td>1.389610e-03</td>\n",
       "      <td>1.649188e-03</td>\n",
       "      <td>9.796699e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>chicago.cwlu_womankind.1973.02.15.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>854</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>26</td>\n",
       "      <td>2</td>\n",
       "      <td>1 18611939 1880s 1902 1902 1904 1906 1915 1915...</td>\n",
       "      <td>0.000350</td>\n",
       "      <td>...</td>\n",
       "      <td>4.288340e-04</td>\n",
       "      <td>3.416225e-03</td>\n",
       "      <td>0.005898</td>\n",
       "      <td>0.000014</td>\n",
       "      <td>1.260788e-03</td>\n",
       "      <td>1.339530e-06</td>\n",
       "      <td>0.000154</td>\n",
       "      <td>1.464510e-03</td>\n",
       "      <td>6.788238e-04</td>\n",
       "      <td>1.239667e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>nyc.redstockings.1973.willis.conservatismofms-...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1154</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>27</td>\n",
       "      <td>2</td>\n",
       "      <td>171 1970 A Alpert Alpert Alpert Alpert Alperts...</td>\n",
       "      <td>0.000041</td>\n",
       "      <td>...</td>\n",
       "      <td>9.717591e-07</td>\n",
       "      <td>1.171223e-03</td>\n",
       "      <td>0.001854</td>\n",
       "      <td>0.001790</td>\n",
       "      <td>4.518394e-04</td>\n",
       "      <td>2.228438e-05</td>\n",
       "      <td>0.505957</td>\n",
       "      <td>1.775311e-01</td>\n",
       "      <td>4.629744e-03</td>\n",
       "      <td>1.061444e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>nyc.redstockings.1973.serre.psychologique-4.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>318</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1 197 1971 20 3 3 5 65 And As Be Brule But Fem...</td>\n",
       "      <td>0.001685</td>\n",
       "      <td>...</td>\n",
       "      <td>1.128922e-05</td>\n",
       "      <td>2.302630e-03</td>\n",
       "      <td>0.002148</td>\n",
       "      <td>0.001033</td>\n",
       "      <td>1.555321e-03</td>\n",
       "      <td>1.895100e-05</td>\n",
       "      <td>0.005081</td>\n",
       "      <td>5.655665e-01</td>\n",
       "      <td>6.627563e-03</td>\n",
       "      <td>5.455298e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>chicago.hullhouse_bulletin.1916.01.60_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1916</td>\n",
       "      <td>699</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>29</td>\n",
       "      <td>1</td>\n",
       "      <td>12060 130 1915 30 A Advancement Art Associatio...</td>\n",
       "      <td>0.000011</td>\n",
       "      <td>...</td>\n",
       "      <td>6.622268e-01</td>\n",
       "      <td>2.357433e-04</td>\n",
       "      <td>0.024198</td>\n",
       "      <td>0.000079</td>\n",
       "      <td>1.114799e-04</td>\n",
       "      <td>1.321053e-07</td>\n",
       "      <td>0.000005</td>\n",
       "      <td>1.624036e-04</td>\n",
       "      <td>6.946475e-05</td>\n",
       "      <td>9.608991e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>chicago.hullhouse_bulletin.1916.01.43_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1916</td>\n",
       "      <td>362</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>30</td>\n",
       "      <td>1</td>\n",
       "      <td>A A Al Augusta Beauty Bowen Bowen Childrens Ch...</td>\n",
       "      <td>0.000012</td>\n",
       "      <td>...</td>\n",
       "      <td>3.559083e-04</td>\n",
       "      <td>9.035185e-05</td>\n",
       "      <td>0.000271</td>\n",
       "      <td>0.000169</td>\n",
       "      <td>6.335339e-07</td>\n",
       "      <td>7.866851e-07</td>\n",
       "      <td>0.000024</td>\n",
       "      <td>2.315143e-05</td>\n",
       "      <td>2.070584e-05</td>\n",
       "      <td>2.168287e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>989</th>\n",
       "      <td>notessecondyear_8.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>1023</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>994</td>\n",
       "      <td>2</td>\n",
       "      <td>A A American And And And Applicant But But But...</td>\n",
       "      <td>0.014345</td>\n",
       "      <td>...</td>\n",
       "      <td>6.558079e-07</td>\n",
       "      <td>2.729892e-04</td>\n",
       "      <td>0.000156</td>\n",
       "      <td>0.811449</td>\n",
       "      <td>3.039084e-04</td>\n",
       "      <td>7.061955e-02</td>\n",
       "      <td>0.000956</td>\n",
       "      <td>1.197361e-03</td>\n",
       "      <td>5.505902e-04</td>\n",
       "      <td>1.464764e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>990</th>\n",
       "      <td>chicago.hullhouse_bulletin.1906.09.47_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1906</td>\n",
       "      <td>730</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>995</td>\n",
       "      <td>1</td>\n",
       "      <td>11 111 11th 1907 1907 19th 1st 5 5 7 7th 830 9...</td>\n",
       "      <td>0.000003</td>\n",
       "      <td>...</td>\n",
       "      <td>4.055088e-01</td>\n",
       "      <td>1.324791e-05</td>\n",
       "      <td>0.003229</td>\n",
       "      <td>0.000002</td>\n",
       "      <td>4.467826e-05</td>\n",
       "      <td>2.326206e-07</td>\n",
       "      <td>0.000003</td>\n",
       "      <td>8.367136e-06</td>\n",
       "      <td>1.962632e-05</td>\n",
       "      <td>2.693854e-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>991</th>\n",
       "      <td>chicago.hullhouse_bulletin.1901.05.10_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1901</td>\n",
       "      <td>451</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>996</td>\n",
       "      <td>1</td>\n",
       "      <td>A AND ARTS Afternoons And Arts Bohemian Buildi...</td>\n",
       "      <td>0.000148</td>\n",
       "      <td>...</td>\n",
       "      <td>2.121965e-03</td>\n",
       "      <td>2.842506e-04</td>\n",
       "      <td>0.001848</td>\n",
       "      <td>0.000016</td>\n",
       "      <td>1.227260e-05</td>\n",
       "      <td>2.235669e-05</td>\n",
       "      <td>0.000021</td>\n",
       "      <td>8.694560e-05</td>\n",
       "      <td>3.335596e-04</td>\n",
       "      <td>3.654254e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>992</th>\n",
       "      <td>nyc.masses_1914.03.07.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1914</td>\n",
       "      <td>212</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>997</td>\n",
       "      <td>1</td>\n",
       "      <td>600000 A A ARE Ahout All And B Being COMPARATI...</td>\n",
       "      <td>0.002526</td>\n",
       "      <td>...</td>\n",
       "      <td>1.316634e-04</td>\n",
       "      <td>4.668155e-04</td>\n",
       "      <td>0.051159</td>\n",
       "      <td>0.008883</td>\n",
       "      <td>7.818998e-05</td>\n",
       "      <td>2.225525e-05</td>\n",
       "      <td>0.006670</td>\n",
       "      <td>6.836366e-03</td>\n",
       "      <td>8.351914e-03</td>\n",
       "      <td>4.198621e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>chicago.hullhouse_bulletin.1913.01.28_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1913</td>\n",
       "      <td>352</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>998</td>\n",
       "      <td>1</td>\n",
       "      <td>A An An An As At Balaleika Balaleika Balls Bow...</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>...</td>\n",
       "      <td>4.027352e-03</td>\n",
       "      <td>2.144072e-04</td>\n",
       "      <td>0.000901</td>\n",
       "      <td>0.000144</td>\n",
       "      <td>5.739224e-06</td>\n",
       "      <td>5.847998e-07</td>\n",
       "      <td>0.000042</td>\n",
       "      <td>5.731538e-05</td>\n",
       "      <td>3.748539e-05</td>\n",
       "      <td>5.592114e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>chicago.cwlu_womankind.1972.03.17.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>819</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>999</td>\n",
       "      <td>2</td>\n",
       "      <td>13 6 A AFTER All And And And And Any BabysitAn...</td>\n",
       "      <td>0.028825</td>\n",
       "      <td>...</td>\n",
       "      <td>1.873163e-04</td>\n",
       "      <td>1.812981e-03</td>\n",
       "      <td>0.003037</td>\n",
       "      <td>0.000330</td>\n",
       "      <td>7.705729e-05</td>\n",
       "      <td>1.447279e-03</td>\n",
       "      <td>0.000282</td>\n",
       "      <td>6.025183e-05</td>\n",
       "      <td>1.193941e-04</td>\n",
       "      <td>1.733746e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>chicago.hullhouse_bulletin.1900.08.13_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1900</td>\n",
       "      <td>908</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "      <td>26th 7th 7th 7th AND Addams Association Associ...</td>\n",
       "      <td>0.000016</td>\n",
       "      <td>...</td>\n",
       "      <td>7.898147e-03</td>\n",
       "      <td>3.682620e-05</td>\n",
       "      <td>0.004368</td>\n",
       "      <td>0.000032</td>\n",
       "      <td>7.560749e-06</td>\n",
       "      <td>4.668949e-06</td>\n",
       "      <td>0.000103</td>\n",
       "      <td>1.634232e-05</td>\n",
       "      <td>9.343600e-05</td>\n",
       "      <td>1.591624e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>nyc.redstockings.1973.brooke.sexroletheory-1.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>965</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1001</td>\n",
       "      <td>2</td>\n",
       "      <td>According Although And Anne Are As Betty Blami...</td>\n",
       "      <td>0.000222</td>\n",
       "      <td>...</td>\n",
       "      <td>2.777913e-05</td>\n",
       "      <td>3.203729e-03</td>\n",
       "      <td>0.001098</td>\n",
       "      <td>0.003309</td>\n",
       "      <td>1.128253e-05</td>\n",
       "      <td>3.136667e-06</td>\n",
       "      <td>0.000584</td>\n",
       "      <td>2.718902e-03</td>\n",
       "      <td>6.211473e-03</td>\n",
       "      <td>2.163617e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>nyc.redstockings.1973.price.keepingwomenout-05...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>261</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1002</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1000 1000 1010 1010 1020 1050 1900 1940 1960...</td>\n",
       "      <td>0.000098</td>\n",
       "      <td>...</td>\n",
       "      <td>1.273454e-04</td>\n",
       "      <td>8.217370e-01</td>\n",
       "      <td>0.054020</td>\n",
       "      <td>0.000253</td>\n",
       "      <td>2.088033e-06</td>\n",
       "      <td>6.038040e-06</td>\n",
       "      <td>0.002684</td>\n",
       "      <td>1.822640e-03</td>\n",
       "      <td>1.973596e-02</td>\n",
       "      <td>2.097033e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>chicago.cwlu_womankind.1972.07.12.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1748</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1003</td>\n",
       "      <td>2</td>\n",
       "      <td>1 10 10 14 15 15 16 1970 2 2 29 3 34 37270 4 4...</td>\n",
       "      <td>0.000403</td>\n",
       "      <td>...</td>\n",
       "      <td>2.466957e-05</td>\n",
       "      <td>2.865772e-04</td>\n",
       "      <td>0.001003</td>\n",
       "      <td>0.001620</td>\n",
       "      <td>3.892161e-04</td>\n",
       "      <td>3.514173e-04</td>\n",
       "      <td>0.000185</td>\n",
       "      <td>8.381261e-04</td>\n",
       "      <td>3.930025e-05</td>\n",
       "      <td>3.989102e-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>notessecondyear_43.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>895</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1004</td>\n",
       "      <td>2</td>\n",
       "      <td>10 A Adult As At Because Gradually He Hindu Hi...</td>\n",
       "      <td>0.000143</td>\n",
       "      <td>...</td>\n",
       "      <td>9.695230e-06</td>\n",
       "      <td>4.796797e-03</td>\n",
       "      <td>0.000335</td>\n",
       "      <td>0.001512</td>\n",
       "      <td>4.415799e-06</td>\n",
       "      <td>1.441087e-05</td>\n",
       "      <td>0.000090</td>\n",
       "      <td>4.306749e-04</td>\n",
       "      <td>5.464089e-03</td>\n",
       "      <td>1.342972e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1000</th>\n",
       "      <td>nyc.masses_1915.11.08.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1915</td>\n",
       "      <td>1372</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>1005</td>\n",
       "      <td>1</td>\n",
       "      <td>And And And And Because But But Do For I I I I...</td>\n",
       "      <td>0.003817</td>\n",
       "      <td>...</td>\n",
       "      <td>2.949473e-05</td>\n",
       "      <td>7.471734e-04</td>\n",
       "      <td>0.003831</td>\n",
       "      <td>0.358347</td>\n",
       "      <td>7.529177e-05</td>\n",
       "      <td>7.949544e-06</td>\n",
       "      <td>0.000391</td>\n",
       "      <td>4.667414e-03</td>\n",
       "      <td>1.149062e-02</td>\n",
       "      <td>5.658331e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1001</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>417</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1006</td>\n",
       "      <td>2</td>\n",
       "      <td>1968 1970 1970 AND Above America Among Atlanti...</td>\n",
       "      <td>0.001615</td>\n",
       "      <td>...</td>\n",
       "      <td>5.654861e-06</td>\n",
       "      <td>4.122189e-03</td>\n",
       "      <td>0.003647</td>\n",
       "      <td>0.001637</td>\n",
       "      <td>6.493332e-05</td>\n",
       "      <td>4.460301e-06</td>\n",
       "      <td>0.005652</td>\n",
       "      <td>6.412765e-02</td>\n",
       "      <td>1.336864e-02</td>\n",
       "      <td>6.239278e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>chicago.cwlu_womankind.1972.03.03.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>691</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1007</td>\n",
       "      <td>2</td>\n",
       "      <td>3 37 9 A Act And Association August British Bu...</td>\n",
       "      <td>0.012503</td>\n",
       "      <td>...</td>\n",
       "      <td>8.612393e-04</td>\n",
       "      <td>2.293897e-03</td>\n",
       "      <td>0.066242</td>\n",
       "      <td>0.000290</td>\n",
       "      <td>3.431798e-02</td>\n",
       "      <td>1.842017e-04</td>\n",
       "      <td>0.000269</td>\n",
       "      <td>3.375430e-03</td>\n",
       "      <td>1.117519e-03</td>\n",
       "      <td>9.198406e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1003</th>\n",
       "      <td>nyc.redstockings.1973.leon.dirtytricks-03.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1208</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1008</td>\n",
       "      <td>2</td>\n",
       "      <td>15year 1953 1957 195960 195962 1965 1965 1967 ...</td>\n",
       "      <td>0.000022</td>\n",
       "      <td>...</td>\n",
       "      <td>1.861875e-05</td>\n",
       "      <td>3.236717e-02</td>\n",
       "      <td>0.001620</td>\n",
       "      <td>0.000176</td>\n",
       "      <td>3.638160e-05</td>\n",
       "      <td>2.399494e-07</td>\n",
       "      <td>0.000984</td>\n",
       "      <td>1.760869e-03</td>\n",
       "      <td>5.848419e-04</td>\n",
       "      <td>2.864224e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1004</th>\n",
       "      <td>chicago.cwlu_womankind.1973.02.11.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>1912</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1009</td>\n",
       "      <td>2</td>\n",
       "      <td>170 19 197172 1972 1972 1972 1972 1973 20 200 ...</td>\n",
       "      <td>0.000629</td>\n",
       "      <td>...</td>\n",
       "      <td>2.376424e-04</td>\n",
       "      <td>1.958426e-03</td>\n",
       "      <td>0.170557</td>\n",
       "      <td>0.000004</td>\n",
       "      <td>5.263562e-05</td>\n",
       "      <td>1.127680e-07</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>3.218346e-04</td>\n",
       "      <td>9.651502e-06</td>\n",
       "      <td>2.077392e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1005</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1218</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1010</td>\n",
       "      <td>2</td>\n",
       "      <td>100 1959 Anthony Anthony Anthony Anthony Antho...</td>\n",
       "      <td>0.000002</td>\n",
       "      <td>...</td>\n",
       "      <td>3.221318e-06</td>\n",
       "      <td>4.119853e-04</td>\n",
       "      <td>0.000180</td>\n",
       "      <td>0.000063</td>\n",
       "      <td>3.385272e-06</td>\n",
       "      <td>1.105109e-06</td>\n",
       "      <td>0.000386</td>\n",
       "      <td>8.059869e-04</td>\n",
       "      <td>9.635969e-01</td>\n",
       "      <td>1.008740e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1006</th>\n",
       "      <td>chicago.cwlu_womankind.1972.02.22.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>599</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1011</td>\n",
       "      <td>2</td>\n",
       "      <td>100 12 1250 180 1910 1910 1969 1970 1970 2 2 2...</td>\n",
       "      <td>0.025898</td>\n",
       "      <td>...</td>\n",
       "      <td>3.474708e-04</td>\n",
       "      <td>4.011134e-03</td>\n",
       "      <td>0.052740</td>\n",
       "      <td>0.000046</td>\n",
       "      <td>1.248565e-03</td>\n",
       "      <td>1.089900e-04</td>\n",
       "      <td>0.000069</td>\n",
       "      <td>5.165846e-04</td>\n",
       "      <td>4.116590e-05</td>\n",
       "      <td>1.779840e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1007</th>\n",
       "      <td>chicago.hullhouse_bulletin.1903.01.18_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1903</td>\n",
       "      <td>750</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1012</td>\n",
       "      <td>1</td>\n",
       "      <td>11 12 1903 5 7 8 9 A AJAX APARTMENTS Achilles ...</td>\n",
       "      <td>0.000265</td>\n",
       "      <td>...</td>\n",
       "      <td>4.146514e-04</td>\n",
       "      <td>8.488862e-06</td>\n",
       "      <td>0.000068</td>\n",
       "      <td>0.001614</td>\n",
       "      <td>1.174057e-05</td>\n",
       "      <td>1.035402e-05</td>\n",
       "      <td>0.000003</td>\n",
       "      <td>5.741842e-05</td>\n",
       "      <td>3.020070e-04</td>\n",
       "      <td>1.189586e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1008</th>\n",
       "      <td>chicago.cwlu_womankind.1973.09.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>1158</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1013</td>\n",
       "      <td>2</td>\n",
       "      <td>1000year 12 2 212 23 5000 60s Action Again Age...</td>\n",
       "      <td>0.000200</td>\n",
       "      <td>...</td>\n",
       "      <td>1.253247e-04</td>\n",
       "      <td>2.185908e-03</td>\n",
       "      <td>0.051929</td>\n",
       "      <td>0.000055</td>\n",
       "      <td>4.996564e-04</td>\n",
       "      <td>2.361557e-07</td>\n",
       "      <td>0.000039</td>\n",
       "      <td>2.244743e-02</td>\n",
       "      <td>8.935311e-05</td>\n",
       "      <td>1.914580e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1009</th>\n",
       "      <td>chicago.cwlu_womankind.1972.07.01.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>46</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1014</td>\n",
       "      <td>2</td>\n",
       "      <td>1 10 11 11 14 1972 25 8 Chicago JULY Liberatio...</td>\n",
       "      <td>0.000509</td>\n",
       "      <td>...</td>\n",
       "      <td>6.366443e-04</td>\n",
       "      <td>6.069170e-04</td>\n",
       "      <td>0.011148</td>\n",
       "      <td>0.000157</td>\n",
       "      <td>1.829409e-01</td>\n",
       "      <td>7.312474e-04</td>\n",
       "      <td>0.002165</td>\n",
       "      <td>1.118205e-02</td>\n",
       "      <td>1.787384e-03</td>\n",
       "      <td>2.538137e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1010</th>\n",
       "      <td>nyc.redstockings.1973.price.keepingwomenout-11...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>815</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1015</td>\n",
       "      <td>2</td>\n",
       "      <td>100000 10786 131 1380000year 1500000year 15000...</td>\n",
       "      <td>0.000023</td>\n",
       "      <td>...</td>\n",
       "      <td>1.864383e-06</td>\n",
       "      <td>9.528454e-01</td>\n",
       "      <td>0.001478</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>4.428123e-08</td>\n",
       "      <td>7.758343e-08</td>\n",
       "      <td>0.000148</td>\n",
       "      <td>4.066575e-04</td>\n",
       "      <td>2.740341e-04</td>\n",
       "      <td>4.268418e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1011</th>\n",
       "      <td>notessecondyear_18.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>1059</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1016</td>\n",
       "      <td>2</td>\n",
       "      <td>2 And And But But But But For He His I I In In...</td>\n",
       "      <td>0.003972</td>\n",
       "      <td>...</td>\n",
       "      <td>7.336830e-07</td>\n",
       "      <td>4.029945e-02</td>\n",
       "      <td>0.000208</td>\n",
       "      <td>0.142684</td>\n",
       "      <td>1.241897e-06</td>\n",
       "      <td>4.875436e-07</td>\n",
       "      <td>0.000116</td>\n",
       "      <td>5.754603e-03</td>\n",
       "      <td>1.942943e-03</td>\n",
       "      <td>6.965635e-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1012</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1142</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1017</td>\n",
       "      <td>2</td>\n",
       "      <td>2 A A A A All And And And As As As As Because ...</td>\n",
       "      <td>0.000003</td>\n",
       "      <td>...</td>\n",
       "      <td>2.270889e-06</td>\n",
       "      <td>3.383372e-04</td>\n",
       "      <td>0.000702</td>\n",
       "      <td>0.000085</td>\n",
       "      <td>1.426757e-06</td>\n",
       "      <td>1.483102e-07</td>\n",
       "      <td>0.000668</td>\n",
       "      <td>3.444811e-03</td>\n",
       "      <td>4.705313e-01</td>\n",
       "      <td>2.651383e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <td>nyc.masses_1915.11.09.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1915</td>\n",
       "      <td>1643</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>1018</td>\n",
       "      <td>1</td>\n",
       "      <td>A A A A A A A Against Ah Alice All Allies And ...</td>\n",
       "      <td>0.001734</td>\n",
       "      <td>...</td>\n",
       "      <td>9.533243e-04</td>\n",
       "      <td>3.595905e-04</td>\n",
       "      <td>0.018908</td>\n",
       "      <td>0.006327</td>\n",
       "      <td>5.120805e-03</td>\n",
       "      <td>6.866228e-05</td>\n",
       "      <td>0.000109</td>\n",
       "      <td>1.598283e-02</td>\n",
       "      <td>7.800131e-03</td>\n",
       "      <td>1.178860e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1014</th>\n",
       "      <td>chicago.hullhouse_bulletin.1910.05.36_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1910</td>\n",
       "      <td>270</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1019</td>\n",
       "      <td>1</td>\n",
       "      <td>A Another Association Association Christmas Ch...</td>\n",
       "      <td>0.000012</td>\n",
       "      <td>...</td>\n",
       "      <td>7.797042e-04</td>\n",
       "      <td>7.196264e-05</td>\n",
       "      <td>0.000640</td>\n",
       "      <td>0.000161</td>\n",
       "      <td>1.775221e-06</td>\n",
       "      <td>8.746161e-07</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>3.409313e-05</td>\n",
       "      <td>4.558465e-05</td>\n",
       "      <td>4.475873e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1015</th>\n",
       "      <td>chicago.hullhouse_bulletin.1905.08.22_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1905</td>\n",
       "      <td>845</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1020</td>\n",
       "      <td>1</td>\n",
       "      <td>100 106 1101tionse 1896 1897 1c6 1st 43 A Aid ...</td>\n",
       "      <td>0.000055</td>\n",
       "      <td>...</td>\n",
       "      <td>4.720976e-02</td>\n",
       "      <td>3.561534e-04</td>\n",
       "      <td>0.001372</td>\n",
       "      <td>0.000003</td>\n",
       "      <td>1.206954e-06</td>\n",
       "      <td>1.257704e-04</td>\n",
       "      <td>0.000014</td>\n",
       "      <td>9.285064e-06</td>\n",
       "      <td>2.353426e-05</td>\n",
       "      <td>8.135017e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1016</th>\n",
       "      <td>chicago.cwlu_womankind.1972.04.01.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>20</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1021</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1972 8 April Home Womankind and girls is no ...</td>\n",
       "      <td>0.004198</td>\n",
       "      <td>...</td>\n",
       "      <td>1.415338e-03</td>\n",
       "      <td>4.366391e-03</td>\n",
       "      <td>0.012436</td>\n",
       "      <td>0.000607</td>\n",
       "      <td>5.722801e-03</td>\n",
       "      <td>6.286530e-04</td>\n",
       "      <td>0.001987</td>\n",
       "      <td>2.100345e-03</td>\n",
       "      <td>4.512580e-04</td>\n",
       "      <td>5.048902e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1017</th>\n",
       "      <td>chicago.cwlu_womankind.1971.12.14.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1971</td>\n",
       "      <td>1195</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1022</td>\n",
       "      <td>2</td>\n",
       "      <td>95 Adn And And And Answering At At Because Bei...</td>\n",
       "      <td>0.002868</td>\n",
       "      <td>...</td>\n",
       "      <td>7.288985e-05</td>\n",
       "      <td>7.861063e-04</td>\n",
       "      <td>0.002966</td>\n",
       "      <td>0.000116</td>\n",
       "      <td>7.850430e-04</td>\n",
       "      <td>3.332340e-05</td>\n",
       "      <td>0.000449</td>\n",
       "      <td>1.937276e-03</td>\n",
       "      <td>1.824477e-04</td>\n",
       "      <td>1.278577e-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018</th>\n",
       "      <td>nyc.redstockings.1973.leon.conditioningline-3.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1119</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1023</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1970 1971 2 3 4 5 6 A A A A According Aug Br...</td>\n",
       "      <td>0.000369</td>\n",
       "      <td>...</td>\n",
       "      <td>2.711413e-05</td>\n",
       "      <td>4.802085e-03</td>\n",
       "      <td>0.001749</td>\n",
       "      <td>0.005094</td>\n",
       "      <td>9.978111e-06</td>\n",
       "      <td>2.281335e-06</td>\n",
       "      <td>0.000365</td>\n",
       "      <td>3.029369e-03</td>\n",
       "      <td>2.114945e-03</td>\n",
       "      <td>2.491901e-06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1018 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    doc     city  \\\n",
       "1                                notessecondyear_70.txt      nyc   \n",
       "2                 chicago.cwlu_womankind.1971.11.06.txt  chicago   \n",
       "3                             nyc.masses_1916.04.21.txt      nyc   \n",
       "4     nyc.redstockings.1973.mainardi.marriagequestio...      nyc   \n",
       "5                 chicago.cwlu_womankind.1972.01.01.txt  chicago   \n",
       "6                                 notesfirstyear_30.txt      nyc   \n",
       "7                 chicago.cwlu_womankind.1972.05.14.txt  chicago   \n",
       "8     nyc.redstockings.1973.sarachild.programforcons...      nyc   \n",
       "9                 chicago.cwlu_womankind.1972.11.11.txt  chicago   \n",
       "10                chicago.cwlu_womankind.1972.03.20.txt  chicago   \n",
       "11                chicago.cwlu_womankind.1972.01.12.txt  chicago   \n",
       "12                chicago.cwlu_womankind.1972.09.06.txt  chicago   \n",
       "13                chicago.cwlu_womankind.1973.04.16.txt  chicago   \n",
       "14                chicago.cwlu_womankind.1972.12.08.txt  chicago   \n",
       "15                chicago.cwlu_womankind.1973.11.06.txt  chicago   \n",
       "16                chicago.cwlu_womankind.1973.06.08.txt  chicago   \n",
       "17                chicago.cwlu_womankind.1972.06.13.txt  chicago   \n",
       "18                chicago.cwlu_womankind.1972.12.05.txt  chicago   \n",
       "19    nyc.redstockings.1973.hanisch.mensliberation-3...      nyc   \n",
       "20    nyc.redstockings.1973.hanisch.workingconscious...      nyc   \n",
       "21                               notessecondyear_25.txt      nyc   \n",
       "22    nyc.redstockings.1973.sarachild.whatwereallywa...      nyc   \n",
       "23                chicago.cwlu_womankind.1971.12.03.txt  chicago   \n",
       "24    chicago.hullhouse_bulletin.1916.01.61_article.txt  chicago   \n",
       "25                chicago.cwlu_womankind.1973.09.04.txt  chicago   \n",
       "26                chicago.cwlu_womankind.1973.02.15.txt  chicago   \n",
       "27    nyc.redstockings.1973.willis.conservatismofms-...      nyc   \n",
       "28      nyc.redstockings.1973.serre.psychologique-4.txt      nyc   \n",
       "29    chicago.hullhouse_bulletin.1916.01.60_article.txt  chicago   \n",
       "30    chicago.hullhouse_bulletin.1916.01.43_article.txt  chicago   \n",
       "...                                                 ...      ...   \n",
       "989                               notessecondyear_8.txt      nyc   \n",
       "990   chicago.hullhouse_bulletin.1906.09.47_article.txt  chicago   \n",
       "991   chicago.hullhouse_bulletin.1901.05.10_article.txt  chicago   \n",
       "992                           nyc.masses_1914.03.07.txt      nyc   \n",
       "993   chicago.hullhouse_bulletin.1913.01.28_article.txt  chicago   \n",
       "994               chicago.cwlu_womankind.1972.03.17.txt  chicago   \n",
       "995   chicago.hullhouse_bulletin.1900.08.13_article.txt  chicago   \n",
       "996    nyc.redstockings.1973.brooke.sexroletheory-1.txt      nyc   \n",
       "997   nyc.redstockings.1973.price.keepingwomenout-05...      nyc   \n",
       "998               chicago.cwlu_womankind.1972.07.12.txt  chicago   \n",
       "999                              notessecondyear_43.txt      nyc   \n",
       "1000                          nyc.masses_1915.11.08.txt      nyc   \n",
       "1001  nyc.redstockings.1973.sarachild.powerofhistory...      nyc   \n",
       "1002              chicago.cwlu_womankind.1972.03.03.txt  chicago   \n",
       "1003      nyc.redstockings.1973.leon.dirtytricks-03.txt      nyc   \n",
       "1004              chicago.cwlu_womankind.1973.02.11.txt  chicago   \n",
       "1005  nyc.redstockings.1973.sarachild.powerofhistory...      nyc   \n",
       "1006              chicago.cwlu_womankind.1972.02.22.txt  chicago   \n",
       "1007  chicago.hullhouse_bulletin.1903.01.18_article.txt  chicago   \n",
       "1008              chicago.cwlu_womankind.1973.09.06.txt  chicago   \n",
       "1009              chicago.cwlu_womankind.1972.07.01.txt  chicago   \n",
       "1010  nyc.redstockings.1973.price.keepingwomenout-11...      nyc   \n",
       "1011                             notessecondyear_18.txt      nyc   \n",
       "1012  nyc.redstockings.1973.sarachild.powerofhistory...      nyc   \n",
       "1013                          nyc.masses_1915.11.09.txt      nyc   \n",
       "1014  chicago.hullhouse_bulletin.1910.05.36_article.txt  chicago   \n",
       "1015  chicago.hullhouse_bulletin.1905.08.22_article.txt  chicago   \n",
       "1016              chicago.cwlu_womankind.1972.04.01.txt  chicago   \n",
       "1017              chicago.cwlu_womankind.1971.12.14.txt  chicago   \n",
       "1018  nyc.redstockings.1973.leon.conditioningline-3.txt      nyc   \n",
       "\n",
       "             publication  date  word_count           org  identifier  wave  \\\n",
       "1        notessecondyear  1969         553  redstockings           1     2   \n",
       "2         cwlu_womankind  1971         890          cwlu           2     2   \n",
       "3                 masses  1916         425    heterodoxy           3     1   \n",
       "4           redstockings  1973         972  redstockings           4     2   \n",
       "5         cwlu_womankind  1972          39          cwlu           5     2   \n",
       "6         notesfirstyear  1968         442  redstockings           6     2   \n",
       "7         cwlu_womankind  1972         976          cwlu           7     2   \n",
       "8           redstockings  1973         785  redstockings           8     2   \n",
       "9         cwlu_womankind  1972         985          cwlu           9     2   \n",
       "10        cwlu_womankind  1972         369          cwlu          10     2   \n",
       "11        cwlu_womankind  1972         938          cwlu          11     2   \n",
       "12        cwlu_womankind  1972        1591          cwlu          12     2   \n",
       "13        cwlu_womankind  1973         689          cwlu          13     2   \n",
       "14        cwlu_womankind  1972        1130          cwlu          14     2   \n",
       "15        cwlu_womankind  1973         899          cwlu          15     2   \n",
       "16        cwlu_womankind  1973        1106          cwlu          16     2   \n",
       "17        cwlu_womankind  1972        1573          cwlu          17     2   \n",
       "18        cwlu_womankind  1972        1439          cwlu          18     2   \n",
       "19          redstockings  1973        1094  redstockings          19     2   \n",
       "20          redstockings  1973        1011  redstockings          20     2   \n",
       "21       notessecondyear  1969         974  redstockings          21     2   \n",
       "22          redstockings  1973         959  redstockings          22     2   \n",
       "23        cwlu_womankind  1971         137          cwlu          23     2   \n",
       "24    hullhouse_bulletin  1916         717     hullhouse          24     1   \n",
       "25        cwlu_womankind  1973         878          cwlu          25     2   \n",
       "26        cwlu_womankind  1973         854          cwlu          26     2   \n",
       "27          redstockings  1973        1154  redstockings          27     2   \n",
       "28          redstockings  1973         318  redstockings          28     2   \n",
       "29    hullhouse_bulletin  1916         699     hullhouse          29     1   \n",
       "30    hullhouse_bulletin  1916         362     hullhouse          30     1   \n",
       "...                  ...   ...         ...           ...         ...   ...   \n",
       "989      notessecondyear  1969        1023  redstockings         994     2   \n",
       "990   hullhouse_bulletin  1906         730     hullhouse         995     1   \n",
       "991   hullhouse_bulletin  1901         451     hullhouse         996     1   \n",
       "992               masses  1914         212    heterodoxy         997     1   \n",
       "993   hullhouse_bulletin  1913         352     hullhouse         998     1   \n",
       "994       cwlu_womankind  1972         819          cwlu         999     2   \n",
       "995   hullhouse_bulletin  1900         908     hullhouse        1000     1   \n",
       "996         redstockings  1973         965  redstockings        1001     2   \n",
       "997         redstockings  1973         261  redstockings        1002     2   \n",
       "998       cwlu_womankind  1972        1748          cwlu        1003     2   \n",
       "999      notessecondyear  1969         895  redstockings        1004     2   \n",
       "1000              masses  1915        1372    heterodoxy        1005     1   \n",
       "1001        redstockings  1973         417  redstockings        1006     2   \n",
       "1002      cwlu_womankind  1972         691          cwlu        1007     2   \n",
       "1003        redstockings  1973        1208  redstockings        1008     2   \n",
       "1004      cwlu_womankind  1973        1912          cwlu        1009     2   \n",
       "1005        redstockings  1973        1218  redstockings        1010     2   \n",
       "1006      cwlu_womankind  1972         599          cwlu        1011     2   \n",
       "1007  hullhouse_bulletin  1903         750     hullhouse        1012     1   \n",
       "1008      cwlu_womankind  1973        1158          cwlu        1013     2   \n",
       "1009      cwlu_womankind  1972          46          cwlu        1014     2   \n",
       "1010        redstockings  1973         815  redstockings        1015     2   \n",
       "1011     notessecondyear  1969        1059  redstockings        1016     2   \n",
       "1012        redstockings  1973        1142  redstockings        1017     2   \n",
       "1013              masses  1915        1643    heterodoxy        1018     1   \n",
       "1014  hullhouse_bulletin  1910         270     hullhouse        1019     1   \n",
       "1015  hullhouse_bulletin  1905         845     hullhouse        1020     1   \n",
       "1016      cwlu_womankind  1972          20          cwlu        1021     2   \n",
       "1017      cwlu_womankind  1971        1195          cwlu        1022     2   \n",
       "1018        redstockings  1973        1119  redstockings        1023     2   \n",
       "\n",
       "                                            text_string        X1  \\\n",
       "1     1 1 1 1 1 10 11 2 2 2 2 3 3 3 4 5 6 7 8 9 A An...  0.000009   \n",
       "2     411 93 Actually Alice American American Any As...  0.005051   \n",
       "3     All Anarchist Anarchist And Birth Birth Birth ...  0.000005   \n",
       "4     1968 1968 50s 60s Although Although American A...  0.000610   \n",
       "5     1972 5 Ghots I January Womankind a bind by cro...  0.000417   \n",
       "6     12 12 15 1868 1868 1968 28 A AUNT All Anybody ...  0.000198   \n",
       "7     1 1970 2 2 3 4 4 5 6 7 8 A AT Also Also Amer A...  0.002649   \n",
       "8     1 2 3 A A A APPENDIX And CONSCIOUSNESSRAISING ...  0.000005   \n",
       "9     1867 1972 A AND ARTICLES Adopt Affiar All Amaz...  0.000512   \n",
       "10    Above CWLU CWLU CWLU Chicago Chicago Discus Li...  0.000227   \n",
       "11    10 12volt 15 6volt 6volt 6volt A A Another As ...  0.609806   \n",
       "12    1930s 1930s 1auqhter 30s 5th 6th 7871786 A Act...  0.002045   \n",
       "13    25 29 2ND 4 656 A Any Any April As Barry But C...  0.003974   \n",
       "14    10 1020 1161 12 1300s 1500 1793 1930s 1950s 19...  0.000046   \n",
       "15    14 30 ABORTION AFLCIO AFLCIO AND Abortions Act...  0.000213   \n",
       "16    112940 1970 1971 1972 1972 1974 26000 3 341840...  0.001097   \n",
       "17    1 1100015000 13 13 1318 18 1800036000 1972 1eV...  0.000165   \n",
       "18    1972 2 2 3 4 438211 552 575 79 852 A A A A A A...  0.001080   \n",
       "19    1000 150 375 6 7 74 And Apparently As As Avenu...  0.027984   \n",
       "20    2 3 3 4 4 Although Being But Do Do FROM Gradua...  0.000223   \n",
       "21    1 1 1570 2 3 369 4 5 5 50 6 6 7 7 8 92169 95 A...  0.122361   \n",
       "22    1971 26 All And And And And August Being But C...  0.000858   \n",
       "23    1970 A A AGGRESS ALL AND ANOTHER AS BE BEFORE ...  0.004322   \n",
       "24    1914 1915 Abbott Addams Addams Addams Aletta A...  0.000097   \n",
       "25    100000 10th 13 160 1950s 1972 1972 20hour 21 2...  0.000280   \n",
       "26    1 18611939 1880s 1902 1902 1904 1906 1915 1915...  0.000350   \n",
       "27    171 1970 A Alpert Alpert Alpert Alpert Alperts...  0.000041   \n",
       "28    1 197 1971 20 3 3 5 65 And As Be Brule But Fem...  0.001685   \n",
       "29    12060 130 1915 30 A Advancement Art Associatio...  0.000011   \n",
       "30    A A Al Augusta Beauty Bowen Bowen Childrens Ch...  0.000012   \n",
       "...                                                 ...       ...   \n",
       "989   A A American And And And Applicant But But But...  0.014345   \n",
       "990   11 111 11th 1907 1907 19th 1st 5 5 7 7th 830 9...  0.000003   \n",
       "991   A AND ARTS Afternoons And Arts Bohemian Buildi...  0.000148   \n",
       "992   600000 A A ARE Ahout All And B Being COMPARATI...  0.002526   \n",
       "993   A An An An As At Balaleika Balaleika Balls Bow...  0.000036   \n",
       "994   13 6 A AFTER All And And And And Any BabysitAn...  0.028825   \n",
       "995   26th 7th 7th 7th AND Addams Association Associ...  0.000016   \n",
       "996   According Although And Anne Are As Betty Blami...  0.000222   \n",
       "997   1 1000 1000 1010 1010 1020 1050 1900 1940 1960...  0.000098   \n",
       "998   1 10 10 14 15 15 16 1970 2 2 29 3 34 37270 4 4...  0.000403   \n",
       "999   10 A Adult As At Because Gradually He Hindu Hi...  0.000143   \n",
       "1000  And And And And Because But But Do For I I I I...  0.003817   \n",
       "1001  1968 1970 1970 AND Above America Among Atlanti...  0.001615   \n",
       "1002  3 37 9 A Act And Association August British Bu...  0.012503   \n",
       "1003  15year 1953 1957 195960 195962 1965 1965 1967 ...  0.000022   \n",
       "1004  170 19 197172 1972 1972 1972 1972 1973 20 200 ...  0.000629   \n",
       "1005  100 1959 Anthony Anthony Anthony Anthony Antho...  0.000002   \n",
       "1006  100 12 1250 180 1910 1910 1969 1970 1970 2 2 2...  0.025898   \n",
       "1007  11 12 1903 5 7 8 9 A AJAX APARTMENTS Achilles ...  0.000265   \n",
       "1008  1000year 12 2 212 23 5000 60s Action Again Age...  0.000200   \n",
       "1009  1 10 11 11 14 1972 25 8 Chicago JULY Liberatio...  0.000509   \n",
       "1010  100000 10786 131 1380000year 1500000year 15000...  0.000023   \n",
       "1011  2 And And But But But But For He His I I In In...  0.003972   \n",
       "1012  2 A A A A All And And And As As As As Because ...  0.000003   \n",
       "1013  A A A A A A A Against Ah Alice All Allies And ...  0.001734   \n",
       "1014  A Another Association Association Christmas Ch...  0.000012   \n",
       "1015  100 106 1101tionse 1896 1897 1c6 1st 43 A Aid ...  0.000055   \n",
       "1016  1 1972 8 April Home Womankind and girls is no ...  0.004198   \n",
       "1017  95 Adn And And And Answering At At Because Bei...  0.002868   \n",
       "1018  1 1970 1971 2 3 4 5 6 A A A A According Aug Br...  0.000369   \n",
       "\n",
       "          ...                X31           X32       X33       X34  \\\n",
       "1         ...       9.839842e-07  1.977760e-04  0.000535  0.003818   \n",
       "2         ...       1.174310e-06  9.718309e-04  0.000185  0.000362   \n",
       "3         ...       1.599000e-02  1.085408e-04  0.103392  0.000093   \n",
       "4         ...       5.621154e-05  4.041277e-03  0.001754  0.003146   \n",
       "5         ...       6.662805e-05  5.592332e-04  0.015263  0.000041   \n",
       "6         ...       1.427645e-04  3.033551e-02  0.017309  0.002739   \n",
       "7         ...       1.073067e-05  2.910664e-05  0.000167  0.000471   \n",
       "8         ...       1.564634e-06  1.118471e-04  0.000506  0.001910   \n",
       "9         ...       1.620817e-05  1.483889e-02  0.044229  0.000432   \n",
       "10        ...       2.225532e-05  3.113088e-04  0.002405  0.000890   \n",
       "11        ...       2.187946e-06  2.188952e-04  0.000195  0.000185   \n",
       "12        ...       4.415765e-04  2.306720e-04  0.022810  0.000429   \n",
       "13        ...       1.693714e-04  2.471187e-04  0.003164  0.361153   \n",
       "14        ...       8.282622e-05  3.213172e-07  0.000349  0.000002   \n",
       "15        ...       4.258701e-04  1.257658e-03  0.005907  0.000144   \n",
       "16        ...       1.057206e-03  2.055740e-03  0.007596  0.000127   \n",
       "17        ...       7.044254e-05  1.355798e-03  0.001558  0.000013   \n",
       "18        ...       1.061891e-04  2.028123e-03  0.011778  0.001235   \n",
       "19        ...       1.345389e-04  8.122124e-03  0.052807  0.002893   \n",
       "20        ...       4.107019e-06  4.187583e-04  0.003844  0.001389   \n",
       "21        ...       2.637828e-05  4.774722e-02  0.001849  0.016906   \n",
       "22        ...       6.155151e-07  1.878060e-03  0.000187  0.000909   \n",
       "23        ...       5.850770e-04  1.537002e-03  0.012361  0.001001   \n",
       "24        ...       2.806775e-01  1.368944e-04  0.014723  0.001854   \n",
       "25        ...       5.606136e-04  5.961911e-04  0.006484  0.000036   \n",
       "26        ...       4.288340e-04  3.416225e-03  0.005898  0.000014   \n",
       "27        ...       9.717591e-07  1.171223e-03  0.001854  0.001790   \n",
       "28        ...       1.128922e-05  2.302630e-03  0.002148  0.001033   \n",
       "29        ...       6.622268e-01  2.357433e-04  0.024198  0.000079   \n",
       "30        ...       3.559083e-04  9.035185e-05  0.000271  0.000169   \n",
       "...       ...                ...           ...       ...       ...   \n",
       "989       ...       6.558079e-07  2.729892e-04  0.000156  0.811449   \n",
       "990       ...       4.055088e-01  1.324791e-05  0.003229  0.000002   \n",
       "991       ...       2.121965e-03  2.842506e-04  0.001848  0.000016   \n",
       "992       ...       1.316634e-04  4.668155e-04  0.051159  0.008883   \n",
       "993       ...       4.027352e-03  2.144072e-04  0.000901  0.000144   \n",
       "994       ...       1.873163e-04  1.812981e-03  0.003037  0.000330   \n",
       "995       ...       7.898147e-03  3.682620e-05  0.004368  0.000032   \n",
       "996       ...       2.777913e-05  3.203729e-03  0.001098  0.003309   \n",
       "997       ...       1.273454e-04  8.217370e-01  0.054020  0.000253   \n",
       "998       ...       2.466957e-05  2.865772e-04  0.001003  0.001620   \n",
       "999       ...       9.695230e-06  4.796797e-03  0.000335  0.001512   \n",
       "1000      ...       2.949473e-05  7.471734e-04  0.003831  0.358347   \n",
       "1001      ...       5.654861e-06  4.122189e-03  0.003647  0.001637   \n",
       "1002      ...       8.612393e-04  2.293897e-03  0.066242  0.000290   \n",
       "1003      ...       1.861875e-05  3.236717e-02  0.001620  0.000176   \n",
       "1004      ...       2.376424e-04  1.958426e-03  0.170557  0.000004   \n",
       "1005      ...       3.221318e-06  4.119853e-04  0.000180  0.000063   \n",
       "1006      ...       3.474708e-04  4.011134e-03  0.052740  0.000046   \n",
       "1007      ...       4.146514e-04  8.488862e-06  0.000068  0.001614   \n",
       "1008      ...       1.253247e-04  2.185908e-03  0.051929  0.000055   \n",
       "1009      ...       6.366443e-04  6.069170e-04  0.011148  0.000157   \n",
       "1010      ...       1.864383e-06  9.528454e-01  0.001478  0.000124   \n",
       "1011      ...       7.336830e-07  4.029945e-02  0.000208  0.142684   \n",
       "1012      ...       2.270889e-06  3.383372e-04  0.000702  0.000085   \n",
       "1013      ...       9.533243e-04  3.595905e-04  0.018908  0.006327   \n",
       "1014      ...       7.797042e-04  7.196264e-05  0.000640  0.000161   \n",
       "1015      ...       4.720976e-02  3.561534e-04  0.001372  0.000003   \n",
       "1016      ...       1.415338e-03  4.366391e-03  0.012436  0.000607   \n",
       "1017      ...       7.288985e-05  7.861063e-04  0.002966  0.000116   \n",
       "1018      ...       2.711413e-05  4.802085e-03  0.001749  0.005094   \n",
       "\n",
       "               X35           X36       X37           X38           X39  \\\n",
       "1     1.121387e-05  2.214182e-08  0.000038  2.896903e-03  1.724483e-04   \n",
       "2     3.533782e-04  1.145776e-03  0.307760  8.900101e-04  9.877170e-03   \n",
       "3     7.041272e-05  1.025539e-05  0.000181  1.981448e-04  1.036758e-03   \n",
       "4     5.095332e-06  4.110163e-06  0.000657  5.475914e-03  3.231665e-03   \n",
       "5     1.111954e-02  4.302147e-03  0.396293  3.225515e-03  1.853908e-03   \n",
       "6     6.336636e-05  6.437441e-05  0.030969  9.519957e-03  3.588646e-02   \n",
       "7     8.218961e-02  4.336913e-01  0.000287  7.205336e-03  2.120207e-04   \n",
       "8     2.077238e-05  2.165445e-07  0.000150  2.214874e-03  1.769736e-02   \n",
       "9     2.356348e-04  8.004961e-06  0.598019  6.536029e-03  4.789977e-04   \n",
       "10    5.669615e-04  3.215436e-05  0.000443  3.157324e-03  3.696998e-04   \n",
       "11    3.941418e-06  6.729796e-04  0.000022  4.303980e-06  2.919611e-06   \n",
       "12    4.670267e-01  3.282751e-04  0.000316  5.848442e-02  4.610525e-03   \n",
       "13    5.016687e-02  2.265492e-03  0.000475  6.066289e-04  3.293610e-05   \n",
       "14    2.382573e-04  1.739985e-04  0.000005  9.202287e-07  8.829141e-07   \n",
       "15    1.375314e-03  4.037606e-05  0.000115  2.322311e-02  1.585904e-04   \n",
       "16    4.788954e-04  1.927180e-04  0.000081  3.401634e-04  8.814712e-06   \n",
       "17    3.585470e-05  6.931998e-09  0.000011  8.414556e-04  8.204866e-05   \n",
       "18    2.414092e-05  2.241605e-04  0.000117  1.852975e-03  9.860627e-06   \n",
       "19    1.443225e-05  8.162189e-07  0.010267  4.983195e-03  2.062907e-04   \n",
       "20    1.488908e-04  2.062882e-07  0.000388  1.752389e-01  3.812483e-03   \n",
       "21    7.317483e-05  4.864050e-04  0.000775  2.858366e-03  1.434772e-03   \n",
       "22    1.162320e-05  2.170622e-05  0.000952  2.109324e-01  8.855499e-04   \n",
       "23    8.044500e-02  2.507706e-03  0.000997  5.242713e-03  3.255564e-04   \n",
       "24    1.447190e-01  2.125971e-04  0.000056  4.482416e-02  1.826577e-03   \n",
       "25    6.439716e-02  3.438558e-04  0.006632  1.389610e-03  1.649188e-03   \n",
       "26    1.260788e-03  1.339530e-06  0.000154  1.464510e-03  6.788238e-04   \n",
       "27    4.518394e-04  2.228438e-05  0.505957  1.775311e-01  4.629744e-03   \n",
       "28    1.555321e-03  1.895100e-05  0.005081  5.655665e-01  6.627563e-03   \n",
       "29    1.114799e-04  1.321053e-07  0.000005  1.624036e-04  6.946475e-05   \n",
       "30    6.335339e-07  7.866851e-07  0.000024  2.315143e-05  2.070584e-05   \n",
       "...            ...           ...       ...           ...           ...   \n",
       "989   3.039084e-04  7.061955e-02  0.000956  1.197361e-03  5.505902e-04   \n",
       "990   4.467826e-05  2.326206e-07  0.000003  8.367136e-06  1.962632e-05   \n",
       "991   1.227260e-05  2.235669e-05  0.000021  8.694560e-05  3.335596e-04   \n",
       "992   7.818998e-05  2.225525e-05  0.006670  6.836366e-03  8.351914e-03   \n",
       "993   5.739224e-06  5.847998e-07  0.000042  5.731538e-05  3.748539e-05   \n",
       "994   7.705729e-05  1.447279e-03  0.000282  6.025183e-05  1.193941e-04   \n",
       "995   7.560749e-06  4.668949e-06  0.000103  1.634232e-05  9.343600e-05   \n",
       "996   1.128253e-05  3.136667e-06  0.000584  2.718902e-03  6.211473e-03   \n",
       "997   2.088033e-06  6.038040e-06  0.002684  1.822640e-03  1.973596e-02   \n",
       "998   3.892161e-04  3.514173e-04  0.000185  8.381261e-04  3.930025e-05   \n",
       "999   4.415799e-06  1.441087e-05  0.000090  4.306749e-04  5.464089e-03   \n",
       "1000  7.529177e-05  7.949544e-06  0.000391  4.667414e-03  1.149062e-02   \n",
       "1001  6.493332e-05  4.460301e-06  0.005652  6.412765e-02  1.336864e-02   \n",
       "1002  3.431798e-02  1.842017e-04  0.000269  3.375430e-03  1.117519e-03   \n",
       "1003  3.638160e-05  2.399494e-07  0.000984  1.760869e-03  5.848419e-04   \n",
       "1004  5.263562e-05  1.127680e-07  0.000028  3.218346e-04  9.651502e-06   \n",
       "1005  3.385272e-06  1.105109e-06  0.000386  8.059869e-04  9.635969e-01   \n",
       "1006  1.248565e-03  1.089900e-04  0.000069  5.165846e-04  4.116590e-05   \n",
       "1007  1.174057e-05  1.035402e-05  0.000003  5.741842e-05  3.020070e-04   \n",
       "1008  4.996564e-04  2.361557e-07  0.000039  2.244743e-02  8.935311e-05   \n",
       "1009  1.829409e-01  7.312474e-04  0.002165  1.118205e-02  1.787384e-03   \n",
       "1010  4.428123e-08  7.758343e-08  0.000148  4.066575e-04  2.740341e-04   \n",
       "1011  1.241897e-06  4.875436e-07  0.000116  5.754603e-03  1.942943e-03   \n",
       "1012  1.426757e-06  1.483102e-07  0.000668  3.444811e-03  4.705313e-01   \n",
       "1013  5.120805e-03  6.866228e-05  0.000109  1.598283e-02  7.800131e-03   \n",
       "1014  1.775221e-06  8.746161e-07  0.000027  3.409313e-05  4.558465e-05   \n",
       "1015  1.206954e-06  1.257704e-04  0.000014  9.285064e-06  2.353426e-05   \n",
       "1016  5.722801e-03  6.286530e-04  0.001987  2.100345e-03  4.512580e-04   \n",
       "1017  7.850430e-04  3.332340e-05  0.000449  1.937276e-03  1.824477e-04   \n",
       "1018  9.978111e-06  2.281335e-06  0.000365  3.029369e-03  2.114945e-03   \n",
       "\n",
       "               X40  \n",
       "1     2.217323e-07  \n",
       "2     4.152077e-05  \n",
       "3     1.956936e-05  \n",
       "4     3.159558e-05  \n",
       "5     7.546477e-04  \n",
       "6     5.097306e-04  \n",
       "7     4.333721e-05  \n",
       "8     1.491251e-06  \n",
       "9     3.526054e-05  \n",
       "10    2.198869e-05  \n",
       "11    1.241344e-05  \n",
       "12    8.258727e-04  \n",
       "13    1.106561e-06  \n",
       "14    1.417024e-06  \n",
       "15    4.077263e-06  \n",
       "16    5.756271e-05  \n",
       "17    4.498704e-06  \n",
       "18    1.692545e-05  \n",
       "19    4.618381e-04  \n",
       "20    2.043010e-05  \n",
       "21    9.256029e-05  \n",
       "22    8.321648e-07  \n",
       "23    1.522500e-04  \n",
       "24    7.061188e-03  \n",
       "25    9.796699e-04  \n",
       "26    1.239667e-04  \n",
       "27    1.061444e-05  \n",
       "28    5.455298e-05  \n",
       "29    9.608991e-03  \n",
       "30    2.168287e-03  \n",
       "...            ...  \n",
       "989   1.464764e-06  \n",
       "990   2.693854e-02  \n",
       "991   3.654254e-01  \n",
       "992   4.198621e-04  \n",
       "993   5.592114e-03  \n",
       "994   1.733746e-04  \n",
       "995   1.591624e-01  \n",
       "996   2.163617e-06  \n",
       "997   2.097033e-04  \n",
       "998   3.989102e-07  \n",
       "999   1.342972e-05  \n",
       "1000  5.658331e-06  \n",
       "1001  6.239278e-04  \n",
       "1002  9.198406e-04  \n",
       "1003  2.864224e-05  \n",
       "1004  2.077392e-05  \n",
       "1005  1.008740e-05  \n",
       "1006  1.779840e-04  \n",
       "1007  1.189586e-03  \n",
       "1008  1.914580e-05  \n",
       "1009  2.538137e-04  \n",
       "1010  4.268418e-06  \n",
       "1011  6.965635e-07  \n",
       "1012  2.651383e-05  \n",
       "1013  1.178860e-04  \n",
       "1014  4.475873e-03  \n",
       "1015  8.135017e-01  \n",
       "1016  5.048902e-04  \n",
       "1017  1.278577e-05  \n",
       "1018  2.491901e-06  \n",
       "\n",
       "[1018 rows x 49 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Read df_all from R into a Pandas dataframe\n",
    "import pandas\n",
    "df = pandas.DataFrame(df_all)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "########################################################\n",
    "########################################################\n",
    "#####rename top 12 topics to match labels in Table 4####\n",
    "########################################################\n",
    "########################################################\n",
    "\n",
    "#Hull House Social Activites = X1\n",
    "#Public Institutions = X27\n",
    "#Hull House Practical Activities = X28\n",
    "#Sanger and Birth Control = X8\n",
    "#Women's lives = X26\n",
    "#Women's Resistance = X21\n",
    "#Anti-War = X7\n",
    "#Liberation School = X9\n",
    "#Women's Health = X10\n",
    "#Forms of Resistance = X25\n",
    "#Movement Theory = X14\n",
    "#Movement history = X39\n",
    "\n",
    "#########################################################\n",
    "#########################################################\n",
    "\n",
    "\n",
    "df.rename(columns={'X1': \"Hull House Social Activities\", 'X28': 'Public Institutions', 'X27': 'Hull House Practical Activities',\n",
    "          'X8': 'Sanger and Birth Control', 'X26': \"Women's Lives\", 'X21': \"Women's Resistance\",\n",
    "          'X7': \"Anti-War\", 'X9': 'Liberation School', 'X10': \"Women's Sexual Health\", \n",
    "           'X25': 'Forms of Resistance', 'X14': \"Movement Theory\", 'X39': 'Movement History'}, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='identify'></a>\n",
    "## Identify Documents"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can now sort the dataframe based on the desired topic to identify documents that best represent each topic. Below I do this for Topic 39, the \"Movement History\" topic, but this can be done for any topic."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>text_string</th>\n",
       "      <th>Movement History</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>558</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>1 100 1972 1972 2 ARCS Alice Although An And A...</td>\n",
       "      <td>0.967030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1005</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>100 1959 Anthony Anthony Anthony Anthony Antho...</td>\n",
       "      <td>0.963597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>1 1928 1959 1965 1969 19th 19th 19th 19th 20th...</td>\n",
       "      <td>0.947335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>110 1848 1850 1870 1871 1902 1920 19th 19th 41...</td>\n",
       "      <td>0.940308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>630</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>19th Ages All Anthony Anthony Anthony Apparent...</td>\n",
       "      <td>0.915081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>1881 19th 19th 19th 2 29 3 4 Aithough American...</td>\n",
       "      <td>0.864953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>Along And At At Because Because Both But Fires...</td>\n",
       "      <td>0.864582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>19th 19th 19th 2506 75 94704 9Some All Also An...</td>\n",
       "      <td>0.816335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>17th 196Os A Although And And And Anne Bradstr...</td>\n",
       "      <td>0.756516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>1 1948 1972 2 2 3 7 772 969 A A AND America Am...</td>\n",
       "      <td>0.705227</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    doc  \\\n",
       "558   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "1005  nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "130   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "492   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "630   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "371   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "102   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "109   nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "31    nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "54    nyc.redstockings.1973.sarachild.powerofhistory...   \n",
       "\n",
       "                                            text_string  Movement History  \n",
       "558   1 100 1972 1972 2 ARCS Alice Although An And A...          0.967030  \n",
       "1005  100 1959 Anthony Anthony Anthony Anthony Antho...          0.963597  \n",
       "130   1 1928 1959 1965 1969 19th 19th 19th 19th 20th...          0.947335  \n",
       "492   110 1848 1850 1870 1871 1902 1920 19th 19th 41...          0.940308  \n",
       "630   19th Ages All Anthony Anthony Anthony Apparent...          0.915081  \n",
       "371   1881 19th 19th 19th 2 29 3 4 Aithough American...          0.864953  \n",
       "102   Along And At At Because Because Both But Fires...          0.864582  \n",
       "109   19th 19th 19th 2506 75 94704 9Some All Also An...          0.816335  \n",
       "31    17th 196Os A Although And And And Anne Bradstr...          0.756516  \n",
       "54    1 1948 1972 2 2 3 7 772 969 A A AND America Am...          0.705227  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Output used in Table 5\n",
    "#Documents with the highest weight for Topic 39, the 'Movement History' topic.\n",
    "df[['doc', 'text_string', 'Movement History']].sort_values(by='Movement History', ascending=False)[:10]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
